////////////////////////////////////////////////////////////////////////////////
//////// PARLIAMENTARY ELECTIONS - ELECTORAL TURNOVERS /////////////////////////
////////////////////////////////////////////////////////////////////////////////

// This do-file associates each parliamentary election with a running variable and treatment variable for electoral turnovers. 

////////////////////////////////////////////////////////////////////////////////
//////// a. Defining an incumbent party for each election
////////////////////////////////////////////////////////////////////////////////

****	1.	Automatic assignment

// We import the parliamentary election database and keep only variables of interest
use "$project_path/data/3_cleaned/parliamentary_elections", clear
keep Country Year Month Type_Election Date Source Total_Seats Party_* Seats_* Seat_Share_* flag*
tempfile parl_elec
save `parl_elec'

/*
We define the incumbent party as the party which won the previous consequential election, if it took place less than 10 years before the election of interest. A consequential is an election which:
	-	was not flaged as inconsequential
	-	was not followed by an election of the same type the same year
	-	was not a constitent election, as defined by V-Dem (V-Dem considers an election as constituent if it was only charged of drafting a new constitution, without further legislative power.
*/

// We flag elections which were not the last of their type during a year
sort Country Year Month
by Country Year: gen temp = _n
by Country Year: egen temp2 = max(temp)
gen not_last = 1 if temp!=temp2
drop temp*

// We flag consequential elections
gen consequential = 1 if not_last==. & flag_inconsequential==. & flag_constituent==. 

// We find the year of the last/next consequential election by looping over observations, as well as the incumbent party. If two parties were tied at first place during the election, we do not define automatically an incumbent party.
gen incumbent_party = ""
gen last_consequential = .
gen next_consequential = .
local N = _N
// Looping over observations
forvalues i = 1/`N' {
	// For each election, we go four elections back/forward to get the previous/next consequential election
	forvalues j=1/4 {
		if Country[`i'-`j']==Country[`i'] & consequential[`i'-`j']==1 & last_consequential[`i']==. {
			qui replace incumbent_party = Party_1[`i'-`j'] in `i' if Seats_1[`i'-`j'] !=Seats_2[`i'-`j']
			qui replace last_consequential = Year[`i'-`j'] in `i'
		}
		if Country[`i'+`j']==Country[`i'] & consequential[`i'+`j']==1 & next_consequential[`i']==. {
			qui replace next_consequential = Year[`i'+`j'] in `i'
		}
	}
}

// We also find the year of the last and next election
sort Country Year Month
local N = _N
gen last_election = .
gen next_election = .
forvalues i = 1/`N' {
	replace last_election = Year[`i'-1] in `i' if Country[`i'-1]==Country[`i']
	replace next_election = Year[`i'+1] in `i' if Country[`i'+1]==Country[`i']
}

// We do not use the last consequential election to define the incument party if the gap between the considered election and the previous consequential election is above 10 years.
gen diff = Year - last_consequential
replace incumbent_party = "" if diff>10 

// We drop elections for which we have no data on results
drop if Source==""
drop if Seat_Share_1==. | Seat_Share_1==0

// We drop purely constituent elections
drop if flag_constituent==1

// We drop elections with appointed members
drop if flag_appointed==1

****	2.	Manual definition

/*
In some cases, we fail to automatically define the incumbent party (for example because the previous election was over 10 years before the considered election, because results data for the previous election is missing (for instance because it is the first election in the dataset) or because the previous election was a tie. In these cases, we attempt to define manually an incumbent party. Such manual definitions are stored in an excel spreadsheet.
*/ 
	
tempfile temp
save `temp'
import excel "$project_path/data/1_input/manual_matches/parliamentary/manual_incumbent_parties.xlsx", firstrow clear 
drop comment
merge 1:1 Country Year Month using `temp', assert(using match) nogen
replace incumbent_party = manual_incumbent_party if manual_incumbent_party!=""
drop manual_incumbent_party

tempfile parl_results
save `parl_results'

////////////////////////////////////////////////////////////////////////////////
//////// b. Matching incumbent parties with parties in the election
////////////////////////////////////////////////////////////////////////////////

/*
We then attempt to match the party defined as the incumbent party with one of the parties in the election, which will be defined as the party representing the incumbency. The party representative of the incumbency is the incumbent party if it took part in the election, the party which succeded the incumbent party in case of a name change or merger, or the party which was supported by the incumbent  party if it did not take part in the election itself.
In most cases, the representative of the incumbency can be selected automatically : a Python script compares the incumbent party with the list of parties competing  in the election, and tries to find a match. If the match is fuzzy, it is checked manually. If no match could be performed automatically, we try to manually define a representative of the incumbency.
*/

****	1.	Automatic matching

// We performed a fuzzy match
import excel "$project_path/data/1_input/manual_matches/parliamentary/representative_incumbency_fuzzy_match.xlsx", firstrow clear
keep Country Year Month incumbent_party party_matched
merge 1:1 Country Year Month incumbent_party using `parl_results', assert(match) nogen
gen party_incumbency = party_matched

****	2.	Manual matching

// When the incumbent party did run in the election but the automatic matching algorithm failed to make a link, we perform a manual match, documented in an excel spreadsheet. 
tempfile temp
save `temp'
import excel "$project_path/data/1_input/manual_matches/parliamentary/representative_incumbency_manual_match.xlsx", firstrow clear
merge 1:1 Country Year Month incumbent_party using `temp', assert(using match) nogen
replace party_incumbency = party_matched_manual if party_matched_manual!=""
drop party_matched_manual comment

****	3.	Manual representative of the incumebncy

// Sometimes, the representant of the incumbency is not the incumbent party, for example because the incumbent party changed its name, or because the incumbent party supported another party. For these cases, we define manually a representative of the incumbency.
tempfile temp
save `temp'
import excel "$project_path/data/1_input/manual_matches/parliamentary/representative_incumbency_manual.xlsx", firstrow clear
merge 1:1 Country Year Month using `temp', assert(using match) nogen
replace party_incumbency = manual_party_incumbency if manual_party_incumbency!=""
drop manual_party_incumbency comment

////////////////////////////////////////////////////////////////////////////////
//////// c. Defining the running variable
////////////////////////////////////////////////////////////////////////////////

// After defining the representative of the incumbency, we find the representative of the opposition, and define the running variable as the difference of their vote shares. 

****	1.	Special case: independents arriving 1st or 2nd

/*
We treat separately cases where independents arrive in first or second position. In these cases, we define manually the representative of the incumbency and the representative of the opposition. 
We also flag two cases where no running variable can be defined:
	-	nonpartisan elections
	-	previous elections "won" by indepedents
*/

tempfile temp
save `temp'
import excel "$project_path/data/1_input/manual_matches/parliamentary/nonpartisans.xlsx", firstrow clear
merge 1:1 Country Year Month using `temp', assert(using match) nogen
gen nonpartisan = 1 if comment=="Nonpartisan election"
gen previous_indep = 1 if comment=="Previous election won by independents"
replace party_incumbency = manual_incumbent_party if manual_incumbent_party!=""
ren manual_opposition_party party_opposition 
drop comment manual_incumbent_party

****	2.	Baseline assignment

// We find the rank of the representative of the incumbency and the opposition (when manually defined)
gen rank_incumbency = .
gen rank_opposition = .
forvalues i=1/74 {
	qui replace rank_incumbency = `i' if party_incumbency==Party_`i' & party_incumbency!="" 
	qui replace rank_opposition = `i' if party_opposition==Party_`i' & party_opposition!="" 
}

// Define incumbent and opposition shares
gen double share_incumbent = .
gen double share_opposition = .
forvalues i=1/74 {
	qui replace share_incumbent = Seat_Share_`i' if rank_incumbency==`i'
	qui replace share_opposition = Seat_Share_`i' if rank_opposition==`i'
}

// When the rank of the opposition has not been manually defined, we define it as the rank of the best-ranked party (excluding the representative of the incumbency) and set the seat share accordingly. 
replace share_opposition = Seat_Share_1 if rank_incumbency>1 & rank_incumbency!=. & rank_opposition==.
replace party_opposition = Party_1 if rank_incumbency>1 & rank_incumbency!=. & rank_opposition==.
replace share_opposition = Seat_Share_2 if rank_incumbency==1 & rank_opposition==.
replace party_opposition = Party_2 if rank_incumbency==1 & rank_opposition==.

****	3.	Special case: the incumbent party won no seats although it was active at the moment of the election (the incumbent party is not in the list of results). This may occur for example when the source we use only documents parties which won seats at the election. In this case, we set the share of the incumbent to 0, and the share of the opposition to the share of the best ranked party. The list of these cases is documented in an excel spreadsheet.

//We import manually coded data and merge it to our dataset
tempfile temp
save `temp'
import excel "$project_path/data/1_input/manual_matches/parliamentary/no_seats.xlsx", firstrow clear
gen incumbent_noseat = 1
merge 1:1 Country Year Month using `temp', assert(using match) nogen  

// We include manually coded data in our dataset
replace share_incumbent = 0 if incumbent_noseat==1
replace party_opposition = Party_1 if incumbent_noseat==1
replace share_opposition = Seat_Share_1 if incumbent_noseat==1
replace party_incumbency = "" if incumbent_noseat==1
drop incumbent_noseat Comment

****	4.	Special case: the incumbent party won all seats

// If the incumbent is the only party competing in the election, we set the running variable to 100
replace share_incumbent = 100 if rank_incumbency==1 & (Party_2=="" | Seat_Share_2==0)
replace share_opposition = 0 if rank_incumbency==1 & (Party_2=="" | Seat_Share_2==0)

****	5.	"Independents" parties

// Although the total number of independents is reported in the results, we consider each independent as belonging to a separate "party" having one seat in parliament. Thus, the only case in which independents are the opposition party is when there is only the party of the incumbency and independents in parliament

// Case where there are only two parties in parliament: the incumbent party and independents (or "others")
gen temp = 1 if party_opposition=="Independents" & Party_2=="Independents" & Party_3==""
replace temp = 1 if party_opposition=="independent" & Party_2=="independent" & Party_3==""
replace temp = 1 if party_opposition=="Others" & Party_2=="Others" & Party_3==""
replace share_opposition =  Seat_Share_2/Seats_2 if temp==1 & Seats_2!=0
drop temp

* Other special cases
// Madagascar 2007: Party_1 is the incumbent, Party_2 is Independents and Party_3 is Others
replace party_opposition = Party_4 if Country=="Madagascar" & Year==2007
replace share_opposition = Seat_Share_4 if Country=="Madagascar" & Year==2007
// Thailand 2006: Party_1 is the incumbent, Party_2 are vacant seats
replace share_incumbent = 100 if Party_2=="Vacant" & Country=="Thailand" & Year==2006
replace share_opposition = 0 if Party_2=="Vacant" & Country=="Thailand" & Year==2006

// Now there should be no case where the opposition party refers to independents or any "Others", when there is a third party. Otherwise, there is a mistake at step 1.
count if party_opposition=="Independents" & Party_3!=""
assert r(N)==0
count if party_opposition=="Others" & Party_3!=""
assert r(N)==0
count if party_opposition=="independent" & Party_3!=""
assert r(N)==0

****	6.	Cases where we cannot define a running variable

// For all the cases in which we could not code a running variable, we documented why it was the case. 
tempfile temp
save `temp'
import excel "$project_path/data/1_input/manual_matches/parliamentary/no_runvar.xlsx", firstrow clear
gen no_runvar = 1
merge 1:1 Country Year Month using `temp', assert(using match) nogen

****	7.	Definition of the running variable and checks

sort Country Year Month Type_Election
gen double runvar = (share_opposition - share_incumbent) if share_incumbent!=. & share_opposition!=.

//When aggregating the seat shares of different parties in coalition, there are rounding errors (at the 15th decimal point) despite storing variables as a double. To make sure we correctly exclude elections with a runvar equal to 0, we perform the following adjustment:
replace runvar = round(runvar, 1e-12)
replace runvar = 0 if abs(runvar)<0.0001 
count if abs(runvar)<0.1 & runvar!=0
assert `r(N)'==0

// We check that we documented all cases in which no running variable could be defined
count if runvar==. & no_runvar!=1 & nonpartisan!=1 & previous_indep!=1
assert r(N)==0

// Cases where the running variable cannot be defined
replace runvar=. if no_runvar==1

****	8.	Treatment variable

// Define treatment variable
gen treatment = 0 if runvar<0 & runvar!=.
replace treatment = 1 if runvar>0 & runvar!=.

// If there is a perfect tie, we set the treatment variable to missing
replace treatment = . if runvar==0                                               

// We build a variable corresponding to the value of the treatment/runvar for the last/next consequential election.
sort Country Year Month
gen last_treatment=.
gen next_treatment=.
gen last_runvar=.
gen next_runvar=.
local N = _N
//Looping over observations
forvalues i = 1/`N' {
	//For each election, we go four elections back/forward to get the previous/next consequential election
	forvalues j=1/4 {
		if Country[`i'-`j']==Country[`i'] & last_consequential[`i']==Year[`i'-`j'] & consequential[`i'-`j']==1  {
			qui replace last_treatment = treatment[`i'-`j'] in `i'
			qui replace last_runvar = runvar[`i'-`j'] in `i'
		}
		if Country[`i'+`j']==Country[`i'] & next_consequential[`i']==Year[`i'+`j'] & consequential[`i'+`j']==1  {
			qui replace next_treatment = treatment[`i'+`j'] in `i'
			qui replace next_runvar = runvar[`i'+`j'] in `i'
		}
	}
}

// Time elapsed since last treatment 
gen elapsed_last_treatment = .
sort Country Year Month 
forvalues k = 1/30 {
	qui replace elapsed_last_treatment = Year - Year[_n-`k'] if elapsed_last_treatment==. & Country==Country[_n-`k'] & treatment[_n-`k']==1
}

////////////////////////////////////////////////////////////////////////////////
///////// Exporting results
////////////////////////////////////////////////////////////////////////////////

local vars_to_keep = "Country Year Month Type_Election runvar treatment incumbent_party rank_incumbency party_incumbency share_incumbent rank_opposition party_opposition share_opposition last_consequential next_consequential last_election next_election last_treatment last_runvar next_treatment next_runvar elapsed_last_treatment flag*"
keep `vars_to_keep'
order `vars_to_keep'

lab var Month				 			"Month"
lab var Type_Election					"Election type"
lab var rank_incumbency				 	"Rank of the incumbency"
lab var incumbent_party					"Incumbent party"
lab var party_incumbency				"Party representing the incumbency"
lab var rank_opposition					"Rank of the opposition"
lab var share_incumbent				 	"Vote share of the candidate representing the incumbency"
lab var share_opposition				"Vote share of the candidate representing the opposition"
lab var runvar							"Running variable (baseline)"
lab var treatment						"Treatment (baseline)"
lab var flag_inconsequential			"Flags inconsequential elections"
lab var last_election				 	"Year of the previous election"
lab var next_election					"Year of the next election"
lab var last_consequential				"Year of the last consequential election"
lab var next_consequential				"Year of the next consequential election"
lab var last_treatment					"Treatment (baseline) at the previous consequential election"
lab var next_treatment					"Treatment (baseline) at the next consequential election"
lab var last_runvar						"Running variable (baseline) at the previous consequential election"
lab var next_runvar						"Running variable (baseline) at the next consequential election"
lab var elapsed_last_treatment 			"Time elaspsed since last treatment"

compress

format incumbent_party party_opposition party_incumbency flag_inconsequential_note %20s
sort Type_Election Country Year Month

save "$project_path/data/3_cleaned/turnovers_parliamentary_electoral", replace
